### load the data
setwd("/Users/sergazy/Downloads/Fall 2018 courses and all files/Eric's class/08:30:2018 Alternative estimators")
load("dataL1L2.Rdata")
ls()
## [1] "Area" "average_error1" "average_error2"
## [4] "average_error3" "average_error4" "beta1"
## [7] "beta2" "betaMLE" "con"
## [10] "D1" "D1_e" "D2"
## [13] "D2_e" "D2lik" "dev0"
## [16] "dev1" "diagonal" "Dlik"
## [19] "err" "error1" "error2"
## [22] "error3" "error4" "estimatedSigmaSq"
## [25] "eta_one" "eta_two" "eta1_true"
## [28] "eta1fit" "eta2_true" "eta2fit"
## [31] "F" "f_2" "f_value_2"
## [34] "f_value_e" "fc" "fc_value"
## [37] "fisher_info" "fit" "fit_H0"
## [40] "fit5" "fitc" "fitc2"
## [43] "fitH0" "fitZ2" "h_m"
## [46] "H0betaMLE" "height" "i"
## [49] "Illit" "Illit2" "Income"
## [52] "lik" "logArea" "mean_true"
## [55] "mean_true_vector" "mu_i" "muhat_i"
## [58] "Murder" "N" "NewtonRaphsonOptim"
## [61] "NROptimNoQuadEta1" "NROptimNoQuadEta2" "ones"
## [64] "optimized" "p_value_e" "Pop"
## [67] "predictedFromGLM" "predictedFromSLM" "predictionSimulation"
## [70] "pval1" "r" "refit3"
## [73] "residualsGLM" "residualsSLM" "RSS"
## [76] "RSS_H0" "RSS4" "RSS5"
## [79] "RSSc" "RSSc2" "sighatsq"
## [82] "sigma_glm" "sigma_i" "sigma_lm"
## [85] "sigma_true" "sigmahat_i" "sigmasquare"
## [88] "singingvoice" "SS_glm" "SS_lm"
## [91] "SSres" "SSresH0" "SSresH02"
## [94] "t" "T" "t1"
## [97] "T2" "v" "x"
## [100] "x1" "X1" "x2"
## [103] "X2" "xsquared" "y"
## [106] "Y" "y_sim" "y_sims_glm"
## [109] "y_sims_lm" "Z" "z1"
## [112] "z2" "Z2"
#a
fit=lm(Y~x1+x2) # in the question Y is dependent variable is given
summary(fit)
##
## Call:
## lm(formula = Y ~ x1 + x2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.5518 -1.3366 0.1614 1.3546 10.9500
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.2550 1.1196 2.907 0.00555 **
## x1 -1.1369 0.1590 -7.150 4.85e-09 ***
## x2 1.8009 0.2932 6.143 1.64e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.79 on 47 degrees of freedom
## Multiple R-squared: 0.6653, Adjusted R-squared: 0.651
## F-statistic: 46.7 on 2 and 47 DF, p-value: 6.77e-12
resid<-fit$res
hist(resid)
qqnorm(resid)
u<-qqnorm(resid, plot= FALSE)
qqline(resid) # it looks a bit of.
#b
L1 = function(b,x1,x2,Y){
sum(abs(Y-b[1]-b[2]*x1-b[3]*x2))
}
par0=fit$coef #Estimator comes from least square
par=optim(par0,L1,gr=NULL,x1,x2,Y) # estimator comes from L1
par
## $par
## (Intercept) x1 x2
## 2.613557 -1.037200 1.887199
##
## $value
## [1] 117.1935
##
## $counts
## function gradient
## 266 NA
##
## $convergence
## [1] 0
##
## $message
## NULL
#now compare with par and par0
# I am doing something here
X=cbind(1,x1,x2)
dim(X)
## [1] 50 3
X %*% as.matrix(par$par)
## [,1]
## [1,] -6.9459701
## [2,] -6.1897725
## [3,] -0.9818339
## [4,] 3.7647635
## [5,] 2.8429985
## [6,] -5.7717323
## [7,] 4.7146451
## [8,] 0.4380312
## [9,] 3.8230497
## [10,] 7.8173365
## [11,] 14.1231268
## [12,] -5.3258099
## [13,] -1.7146461
## [14,] 3.9720566
## [15,] -7.1711405
## [16,] 2.0589541
## [17,] 2.5577729
## [18,] 4.0088094
## [19,] 0.3219342
## [20,] -0.8355570
## [21,] -5.1616104
## [22,] 7.8719187
## [23,] -1.9167159
## [24,] 3.9194666
## [25,] -4.8364955
## [26,] 3.3485642
## [27,] 5.7473657
## [28,] 4.4916913
## [29,] 1.3412889
## [30,] -3.5367874
## [31,] -6.2153116
## [32,] 6.1140190
## [33,] 0.5588167
## [34,] -3.0514979
## [35,] -3.8661249
## [36,] -2.1059421
## [37,] 2.8109264
## [38,] -4.7110091
## [39,] 11.7033723
## [40,] 1.8591111
## [41,] 3.6940842
## [42,] -1.0406509
## [43,] -3.5759672
## [44,] 6.3762887
## [45,] -2.2509685
## [46,] 3.9570232
## [47,] 10.0364562
## [48,] -4.1892696
## [49,] -5.6742568
## [50,] 3.2143553
resid2 <- Y - X %*% as.matrix(par$par)
resid2 - resid
## [,1]
## [1,] -0.30630563
## [2,] -0.34176348
## [3,] -0.43082438
## [4,] 0.54175129
## [5,] -0.09429347
## [6,] -0.35282281
## [7,] 0.13895855
## [8,] 0.41428492
## [9,] 0.11574931
## [10,] 0.39741513
## [11,] -0.52380385
## [12,] -0.45493020
## [13,] 0.18603513
## [14,] 0.57080533
## [15,] -0.32453892
## [16,] 0.22320400
## [17,] 0.45140238
## [18,] 0.21368415
## [19,] 0.40224079
## [20,] 0.07414331
## [21,] -0.33901358
## [22,] 0.24018879
## [23,] -0.35201754
## [24,] 0.39694684
## [25,] -0.10475720
## [26,] 0.47198112
## [27,] -0.52303690
## [28,] 0.54030514
## [29,] 0.48060844
## [30,] -0.53969691
## [31,] -0.38589301
## [32,] 0.11120279
## [33,] -0.17591784
## [34,] -0.30617021
## [35,] -0.05674753
## [36,] -0.09831705
## [37,] 0.52815705
## [38,] -0.09487287
## [39,] -0.71916164
## [40,] -0.23211383
## [41,] 0.21613444
## [42,] -0.59660019
## [43,] -0.18291671
## [44,] 0.32647229
## [45,] -0.47330195
## [46,] 0.11265737
## [47,] 0.05744420
## [48,] -0.26116772
## [49,] -0.27097552
## [50,] 0.45731558
hist(resid2-resid)
mean(resid2 - resid)
## [1] -0.01745745
#this is over I don't think the above is necessary
#c
X=cbind(1,x1,x2)
M=1000
b1sim=matrix(0,M,3)
b2sim=matrix(0,M,3)
error1=0
error2=0
S=sqrt(sum(fit$res^2)/47)
for (i in 1:M){
Usim=rnorm(50)*S
Ysim= X%*%fit$coef + Usim
fitsim=lm(Ysim~x1+x2)
b2sim[i,]=fitsim$coef
parsim=optim(fitsim$coef,L1,gr=NULL,x1,x2,Ysim)
b1sim[i,]=parsim$par
error1=error1+sum((b1sim[i,]-fit$coef)^2)
error2=error2+sum((b2sim[i,]-fit$coef)^2)
}
average_error1=error1/1000
average_error2=error2/1000
#d
#bla bla # Prove it
#e
X=cbind(1,x1,x2)
M=1000
b3sim=matrix(0,M,3)
b4sim=matrix(0,M,3)
error3=0
error4=0
S=sqrt(sum(fit$res^2)/47)
for (i in 1:M){
Usim=rt(50,3)*S
Ysim= X%*%fit$coef + Usim
fitsim=lm(Ysim~x1+x2)
b4sim[i,]=fitsim$coef
parsim=optim(fitsim$coef,L1,gr=NULL,x1,x2,Ysim)
b3sim[i,]=parsim$par
error4=error4+sum((b4sim[i,]-fit$coef)^2)
error3=error3+sum((b3sim[i,]-fit$coef)^2)
}
average_error4=error4/1000 #this is L2
average_error3=error3/1000 #this is L1, this looks better which is very strange.
#f
X=cbind(1,x1,x2)
M=1000
b5sim=matrix(0,M,3)
b6sim=matrix(0,M,3)
count=0
beta_f=c(2,-1,1)
S=sqrt(sum(fit$res^2)/47)
for (i in 1:M){
Usim=rt(50,3)*S
Ysim= X%*%beta_f + Usim
fitsim=lm(Ysim~x1+x2)
b6sim[i,]=fitsim$coef #this is L2
parsim=optim(fitsim$coef,L1,gr=NULL,x1,x2,Ysim)
b5sim[i,]=parsim$par #this is L1
S_f=vcov(fitsim)
coef_f=fitsim$coef
con=c(0,0.7,0)
t_f=(coef_f[2]+1)/sqrt(t(con)%*%S_f%*%con)
p_f=2*(1-pt(t_f,50))
if (p_f < 0.05) {
count=count+1
}
else{
count=count+0
}
}
print(count)
## [1] 71